{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import env"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "import tvm\n",
    "from tvm import te\n",
    "from tvm import rpc\n",
    "from tvm.contrib import utils\n",
    "\n",
    "n = tvm.runtime.convert(1024)\n",
    "A = te.placeholder((n,), name=\"A\")\n",
    "B = te.compute((n,), lambda i: A[i] + 1.0, name=\"B\")\n",
    "s = te.create_schedule(B.op)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using built-in specs.\n",
      "COLLECT_GCC=gcc\n",
      "COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper\n",
      "OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa\n",
      "OFFLOAD_TARGET_DEFAULT=1\n",
      "Target: x86_64-linux-gnu\n",
      "Configured with: ../src/configure -v --with-pkgversion='Ubuntu 11.4.0-2ubuntu1~20.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-11 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib --enable-libphobos-checking=release --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --enable-cet --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-11-PfdVzN/gcc-11-11.4.0/debian/tmp-nvptx/usr,amdgcn-amdhsa=/build/gcc-11-PfdVzN/gcc-11-11.4.0/debian/tmp-gcn/usr --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu --with-build-config=bootstrap-lto-lean --enable-link-serialization=2\n",
      "Thread model: posix\n",
      "Supported LTO compression algorithms: zlib zstd\n",
      "gcc version 11.4.0 (Ubuntu 11.4.0-2ubuntu1~20.04) \n"
     ]
    }
   ],
   "source": [
    "!gcc -v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# target = \"llvm\"\n",
    "target = \"llvm -mtriple=x86_64-linux-gnu\"\n",
    "\n",
    "func = tvm.build(s, [A, B], target=target, name=\"add_one\")\n",
    "# save the lib at a local temp folder\n",
    "temp = utils.tempdir()\n",
    "path = temp.relpath(\"lib.tar\")\n",
    "func.export_library(path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 通过 RPC 远程运行CPU内核\n",
    "\n",
    "将展示如何在远程设备上运行生成的 CPU 内核。\n",
    "\n",
    "首先，从远程设备获取 RPC 会话。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# remote = rpc.LocalSession() # 仿真\n",
    "# 以下是我的环境，请将其更改为您的目标设备 IP 地址。\n",
    "host = \"10.16.11.15\"\n",
    "port = 9090\n",
    "remote = rpc.connect(host, port)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "将库上传到远程设备，然后调用设备本地的编译器重新链接它们。现在 `func` 是远程模块对象。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "remote.upload(path)\n",
    "func = remote.load_module(\"lib.tar\")\n",
    "\n",
    "# 在远程设备上创建数组\n",
    "dev = remote.cpu()\n",
    "a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), dev)\n",
    "b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)\n",
    "# 该函数将在远程设备上运行。\n",
    "func(a, b)\n",
    "np.testing.assert_equal(b.numpy(), a.numpy() + 1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "当你想评估远程设备上内核的性能时，重要的是要避免网络开销。`time_evaluator` 将返回远程函数，该函数在远程设备上多次运行该函数，测量每次运行的成本，并返回测量到的成本。网络开销被排除在外。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.146e-07 secs/op\n"
     ]
    }
   ],
   "source": [
    "time_f = func.time_evaluator(func.entry_name, dev, number=10)\n",
    "cost = time_f(a, b).mean\n",
    "print(f\"{cost:g} secs/op\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 通过 RPC 远程运行 OpenCL 内核\n",
    "\n",
    "对于远程 OpenCL 设备，工作流程几乎与上述相同。\n",
    "\n",
    "您可以定义内核，上传文件，并通过 RPC 运行。\n",
    "\n",
    "```{note}\n",
    ":class: alert alert-info\n",
    "树莓派不支持 OpenCL，以下代码已在 Firefly-RK3399 上测试。您可以参考此 [教程](https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2)为RK3399设置操作系统和OpenCL驱动。\n",
    "```\n",
    "\n",
    "还需要构建在rk3399板上启用了OpenCL的运行时。在 TVM 根目录下执行以下命令：\n",
    "\n",
    "```bash\n",
    "cp cmake/config.cmake .\n",
    "sed -i \"s/USE_OPENCL OFF/USE_OPENCL ON/\" config.cmake\n",
    "make runtime -j4\n",
    "```\n",
    "\n",
    "以下函数展示了如何远程运行 OpenCL 内核："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def run_opencl(opencl_device_host = \"10.77.1.145\"):\n",
    "    # NOTE: 这是我的rk3399开发板的设置。您需要根据您的环境进行相应的修改。\n",
    "    opencl_device_port = 9090\n",
    "    target = tvm.target.Target(\"opencl\", host=\"llvm -mtriple=x86_64-linux-gnu\")\n",
    "\n",
    "    # 为上述 \"add one\" 计算声明创建调度\n",
    "    s = te.create_schedule(B.op)\n",
    "    xo, xi = s[B].split(B.op.axis[0], factor=32)\n",
    "    s[B].bind(xo, te.thread_axis(\"blockIdx.x\"))\n",
    "    s[B].bind(xi, te.thread_axis(\"threadIdx.x\"))\n",
    "    func = tvm.build(s, [A, B], target=target)\n",
    "\n",
    "    remote = rpc.connect(opencl_device_host, opencl_device_port)\n",
    "\n",
    "    # export and upload\n",
    "    path = temp.relpath(\"lib_cl.tar\")\n",
    "    func.export_library(path)\n",
    "    remote.upload(path)\n",
    "    func = remote.load_module(\"lib_cl.tar\")\n",
    "\n",
    "    # run\n",
    "    dev = remote.cl()\n",
    "    a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), dev)\n",
    "    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), dev)\n",
    "    func(a, b)\n",
    "    np.testing.assert_equal(b.numpy(), a.numpy() + 1)\n",
    "    print(\"OpenCL test passed!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "OpenCL test passed!\n"
     ]
    }
   ],
   "source": [
    "run_opencl(opencl_device_host = \"10.16.11.15\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ai",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
